Introduction

La compagnie d’assurance Canadienne AssurancExpertsInc cherche comme tous ses concurrents ? cibler les meilleurs profils de clients pour les diffèrentes cat?gories d’assurances pour ses strat?gies de Marketing. C’est dans ce cadre que s’inscrit notre ?tude sur l’analyse des donn?es relatives aux clients de la comagnie afin de voir les meilleurs profils de client ? cibler lors des compagnes marketing pour une police d’assurance sur caravane. # Compr?hension m?tier ##Probl?matique: Les compagnes publicitaires par e-mails, bien qu’elle soient efficaces avec les clients int?ress?s, peut cr?er un d?sagr?ment chez les clients non int?r?ss?s. Cette strat?gie devient dans ce cas contre-productive et peut nuire ? l’assurance en terme de client?le et de gaspillage des ressources. ## Objectifs: -Cibler les clients pour une compagne marketing. -Cibler l’envoi des mails personnalis?s. -Pousser les profils rep?r?s ? demander l’assurance caravane. ## Data Science goals: -Soustraire les profils des clients potentiellement int?ress?s par une assurance caravane. -Etablir une connaissance ? priori sur les futurs clients. # Compr?hension des donn?es ##Chargement des libraries

library(DMwR)
library(adabag)
library(ggplot2)
library(corrplot)
library(readxl)
library(caret)
library(resample)
library(randomForest)
library(mlbench)
library(rpart.plot)
library(rpart)
library(smotefamily)
library(ROCR)
library(car)
library(MASS)
library(tfestimators)
library(RODBC)
library(plotly)
library(e1071)

mportation du dataset “AssurancExpertsINC”

dataAssurance <- read.table(file.choose(),sep = "\t",dec=".",na.strings = "",header = T)
data_assurance<-dataAssurance

Visualisation de notre dataset

dim(dataAssurance)
## [1] 9822   87
head(dataAssurance,n = 5)
##   SD1 SD2 SD3 SD4 SD5 SD6 SD7 SD8 SD9 SD10 SD11 SD12 SD13 SD14 SD15 SD16
## 1  33   1   3   2   8   0   5   1   3    7    0    2    1    2    6    1
## 2  37   1   2   2   8   1   4   1   4    6    2    2    0    4    5    0
## 3  37   1   2   2   8   0   4   2   4    3    2    4    4    4    2    0
## 4   9   1   3   3   3   2   3   2   4    5    2    2    2    3    4    3
## 5  40   1   4   2  10   1   4   1   4    7    1    2    2    4    4    5
##   SD17 SD18 SD19 SD20 SD21 SD22 SD23 SD24 SD25 SD26 SD27 SD28 SD29 SD30
## 1    2    7    1    0    1    2    5    2    1    1    2    6    1    1
## 2    5    4    0    0    0    5    0    4    0    2    3    5    0    2
## 3    5    4    0    0    0    7    0    2    0    5    0    4    0    7
## 4    4    2    4    0    0    3    1    2    3    2    1    4    0    5
## 5    4    0    0    5    4    0    0    0    9    0    0    0    0    4
##   SD31 SD32 SD33 SD34 SD35 SD36 SD37 SD38 SD39 SD40 SD41 SD42 SD43 PO44
## 1    8    8    0    1    8    1    0    4    5    0    0    4    3    0
## 2    7    7    1    2    6    3    2    0    5    2    0    5    4    2
## 3    2    7    0    2    9    0    4    5    0    0    0    3    4    2
## 4    4    9    0    0    7    2    1    5    3    0    0    4    4    0
## 5    5    6    2    1    5    4    0    0    9    0    0    6    3    0
##   PO45 PO46 PO47 PO48 PO49 PO50 PO51 PO52 PO53 PO54 PO55 PO56 PO57 PO58
## 1    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 2    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 3    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 4    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 5    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   PO59 PO60 PO61 PO62 PO63 PO64 PO65 PO66 PO67 PO68 PO69 PO70 PO71 PO72
## 1    5    0    0    0    0    0    0    0    0    1    0    0    0    0
## 2    2    0    0    0    0    0    2    0    0    0    0    0    0    0
## 3    2    0    0    0    0    0    1    0    0    1    0    0    0    0
## 4    2    0    0    0    0    0    0    0    0    1    0    0    0    0
## 5    6    0    0    0    0    0    0    0    0    0    0    0    0    0
##   PO73 PO74 PO75 PO76 PO77 PO78 PO79 PO80 PO81 PO82 PO83 PO84 PO85 CLASS
## 1    0    0    0    0    0    0    0    1    0    0    0    0    0    No
## 2    0    0    0    0    0    0    0    1    0    0    0    0    0    No
## 3    0    0    0    0    0    0    0    1    0    0    0    0    0    No
## 4    0    0    0    0    0    0    0    1    0    0    0    0    0    No
## 5    0    0    0    0    0    0    0    1    0    0    0    0    0    No
##     STATUS
## 1 Learning
## 2 Learning
## 3 Learning
## 4 Learning
## 5 Learning
summary(dataAssurance)
##       SD1             SD2              SD3             SD4       
##  Min.   : 1.00   Min.   : 1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:10.00   1st Qu.: 1.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :30.00   Median : 1.000   Median :3.000   Median :3.000  
##  Mean   :24.25   Mean   : 1.109   Mean   :2.678   Mean   :2.996  
##  3rd Qu.:35.00   3rd Qu.: 1.000   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :41.00   Max.   :10.000   Max.   :6.000   Max.   :6.000  
##       SD5              SD6              SD7             SD8      
##  Min.   : 1.000   Min.   :0.0000   Min.   :0.000   Min.   :0.00  
##  1st Qu.: 3.000   1st Qu.:0.0000   1st Qu.:4.000   1st Qu.:0.00  
##  Median : 7.000   Median :0.0000   Median :5.000   Median :1.00  
##  Mean   : 5.779   Mean   :0.7007   Mean   :4.638   Mean   :1.05  
##  3rd Qu.: 8.000   3rd Qu.:1.0000   3rd Qu.:6.000   3rd Qu.:2.00  
##  Max.   :10.000   Max.   :9.0000   Max.   :9.000   Max.   :5.00  
##       SD9             SD10            SD11             SD12      
##  Min.   :0.000   Min.   :0.000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:5.000   1st Qu.:0.0000   1st Qu.:1.000  
##  Median :3.000   Median :6.000   Median :1.0000   Median :2.000  
##  Mean   :3.263   Mean   :6.189   Mean   :0.8731   Mean   :2.287  
##  3rd Qu.:4.000   3rd Qu.:7.000   3rd Qu.:1.0000   3rd Qu.:3.000  
##  Max.   :9.000   Max.   :9.000   Max.   :7.0000   Max.   :9.000  
##       SD13            SD14            SD15            SD16      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:2.000   1st Qu.:3.000   1st Qu.:0.000  
##  Median :2.000   Median :3.000   Median :4.000   Median :1.000  
##  Mean   :1.887   Mean   :3.237   Mean   :4.303   Mean   :1.485  
##  3rd Qu.:3.000   3rd Qu.:4.000   3rd Qu.:6.000   3rd Qu.:2.000  
##  Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :9.000  
##       SD17            SD18            SD19            SD20       
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:2.000   1st Qu.:3.000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :3.000   Median :5.000   Median :2.000   Median :0.0000  
##  Mean   :3.307   Mean   :4.592   Mean   :1.899   Mean   :0.4033  
##  3rd Qu.:4.000   3rd Qu.:6.000   3rd Qu.:3.000   3rd Qu.:1.0000  
##  Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :5.0000  
##       SD21             SD22            SD23            SD24      
##  Min.   :0.0000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :0.0000   Median :3.000   Median :2.000   Median :2.000  
##  Mean   :0.5457   Mean   :2.877   Mean   :2.227   Mean   :2.291  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :9.0000   Max.   :9.000   Max.   :9.000   Max.   :9.000  
##       SD25            SD26            SD27            SD28      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:2.000  
##  Median :1.000   Median :2.000   Median :2.000   Median :4.000  
##  Mean   :1.651   Mean   :1.595   Mean   :2.205   Mean   :3.742  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:3.000   3rd Qu.:5.000  
##  Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :9.000  
##       SD29            SD30            SD31            SD32      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:5.000  
##  Median :1.000   Median :4.000   Median :5.000   Median :6.000  
##  Mean   :1.068   Mean   :4.188   Mean   :4.819   Mean   :6.023  
##  3rd Qu.:2.000   3rd Qu.:7.000   3rd Qu.:7.000   3rd Qu.:7.000  
##  Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :9.000  
##       SD33            SD34            SD35            SD36      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:5.000   1st Qu.:1.000  
##  Median :1.000   Median :2.000   Median :7.000   Median :2.000  
##  Mean   :1.336   Mean   :1.957   Mean   :6.254   Mean   :2.751  
##  3rd Qu.:2.000   3rd Qu.:3.000   3rd Qu.:8.000   3rd Qu.:4.000  
##  Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :9.000  
##       SD37            SD38            SD39            SD40       
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000   1st Qu.:0.0000  
##  Median :2.000   Median :4.000   Median :3.000   Median :0.0000  
##  Mean   :2.577   Mean   :3.505   Mean   :2.739   Mean   :0.8085  
##  3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:4.000   3rd Qu.:1.0000  
##  Max.   :9.000   Max.   :9.000   Max.   :9.000   Max.   :9.0000  
##       SD41            SD42            SD43           PO44       
##  Min.   :0.000   Min.   :0.000   Min.   :1.00   Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.:3.000   1st Qu.:3.00   1st Qu.:0.0000  
##  Median :0.000   Median :4.000   Median :4.00   Median :0.0000  
##  Mean   :0.208   Mean   :3.805   Mean   :4.26   Mean   :0.7649  
##  3rd Qu.:0.000   3rd Qu.:4.000   3rd Qu.:6.00   3rd Qu.:2.0000  
##  Max.   :9.000   Max.   :9.000   Max.   :8.00   Max.   :3.0000  
##       PO45              PO46              PO47            PO48        
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :5.000   Median :0.00000  
##  Mean   :0.03889   Mean   :0.07371   Mean   :2.956   Mean   :0.05488  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:6.000   3rd Qu.:0.00000  
##  Max.   :6.00000   Max.   :4.00000   Max.   :9.000   Max.   :7.00000  
##       PO49             PO50               PO51              PO52        
##  Min.   :0.0000   Min.   :0.000000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.000000   Median :0.00000   Median :0.00000  
##  Mean   :0.1708   Mean   :0.008858   Mean   :0.01934   Mean   :0.09356  
##  3rd Qu.:0.0000   3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :7.0000   Max.   :9.000000   Max.   :5.00000   Max.   :7.00000  
##       PO53             PO54            PO55             PO56       
##  Min.   :0.0000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.000   Median :0.0000   Median :0.0000  
##  Mean   :0.0115   Mean   :0.215   Mean   :0.2023   Mean   :0.0115  
##  3rd Qu.:0.0000   3rd Qu.:0.000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :6.0000   Max.   :6.000   Max.   :9.0000   Max.   :6.0000  
##       PO57              PO58              PO59            PO60         
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000   1st Qu.:0.000000  
##  Median :0.00000   Median :0.00000   Median :2.000   Median :0.000000  
##  Mean   :0.01873   Mean   :0.02331   Mean   :1.849   Mean   :0.001629  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:4.000   3rd Qu.:0.000000  
##  Max.   :3.00000   Max.   :7.00000   Max.   :8.000   Max.   :3.000000  
##       PO61              PO62              PO63             PO64        
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.0000   Median :0.00000  
##  Mean   :0.01527   Mean   :0.02535   Mean   :0.0167   Mean   :0.04541  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :6.00000   Max.   :1.00000   Max.   :6.0000   Max.   :5.00000  
##       PO65          PO66              PO67              PO68        
##  Min.   :0.0   Min.   :0.00000   Min.   :0.00000   Min.   : 0.0000  
##  1st Qu.:0.0   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.: 0.0000  
##  Median :0.0   Median :0.00000   Median :0.00000   Median : 1.0000  
##  Mean   :0.4   Mean   :0.01405   Mean   :0.02128   Mean   : 0.5572  
##  3rd Qu.:1.0   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.: 1.0000  
##  Max.   :2.0   Max.   :5.00000   Max.   :1.00000   Max.   :12.0000  
##       PO69             PO70              PO71              PO72       
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.0111   Mean   :0.04022   Mean   :0.00224   Mean   :0.0114  
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :5.0000   Max.   :8.00000   Max.   :4.00000   Max.   :3.0000  
##       PO73              PO74               PO75              PO76        
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.000000   Median :0.00000   Median :0.00000  
##  Mean   :0.03441   Mean   :0.005192   Mean   :0.07107   Mean   :0.07982  
##  3rd Qu.:0.00000   3rd Qu.:0.000000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :6.00000   Max.   :6.000000   Max.   :3.00000   Max.   :8.00000  
##       PO77               PO78               PO79               PO80      
##  Min.   :0.000000   Min.   :0.000000   Min.   :0.000000   Min.   :0.000  
##  1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.000  
##  Median :0.000000   Median :0.000000   Median :0.000000   Median :1.000  
##  Mean   :0.004582   Mean   :0.007941   Mean   :0.004276   Mean   :0.574  
##  3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:1.000  
##  Max.   :1.000000   Max.   :1.000000   Max.   :2.000000   Max.   :7.000  
##       PO81                PO82               PO83        
##  Min.   :0.0000000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.:0.0000000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median :0.0000000   Median :0.000000   Median :0.00000  
##  Mean   :0.0009163   Mean   :0.005091   Mean   :0.03146  
##  3rd Qu.:0.0000000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :1.0000000   Max.   :2.000000   Max.   :4.00000  
##       PO84              PO85         CLASS           STATUS    
##  Min.   :0.00000   Min.   :0.00000   No :9236   Learning:5822  
##  1st Qu.:0.00000   1st Qu.:0.00000   Yes: 586   Test    :4000  
##  Median :0.00000   Median :0.00000                             
##  Mean   :0.00845   Mean   :0.01385                             
##  3rd Qu.:0.00000   3rd Qu.:0.00000                             
##  Max.   :2.00000   Max.   :2.00000
str(dataAssurance)
## 'data.frame':    9822 obs. of  87 variables:
##  $ SD1   : int  33 37 37 9 40 23 39 33 33 11 ...
##  $ SD2   : int  1 1 1 1 1 1 2 1 1 2 ...
##  $ SD3   : int  3 2 2 3 4 2 3 2 2 3 ...
##  $ SD4   : int  2 2 2 3 2 1 2 3 4 3 ...
##  $ SD5   : int  8 8 8 3 10 5 9 8 8 3 ...
##  $ SD6   : int  0 1 0 2 1 0 2 0 0 3 ...
##  $ SD7   : int  5 4 4 3 4 5 2 7 1 5 ...
##  $ SD8   : int  1 1 2 2 1 0 0 0 3 0 ...
##  $ SD9   : int  3 4 4 4 4 5 5 2 6 2 ...
##  $ SD10  : int  7 6 3 5 7 0 7 7 6 7 ...
##  $ SD11  : int  0 2 2 2 1 6 2 2 0 0 ...
##  $ SD12  : int  2 2 4 2 2 3 0 0 3 2 ...
##  $ SD13  : int  1 0 4 2 2 3 0 0 3 2 ...
##  $ SD14  : int  2 4 4 3 4 5 3 5 3 2 ...
##  $ SD15  : int  6 5 2 4 4 2 6 4 3 6 ...
##  $ SD16  : int  1 0 0 3 5 0 0 0 0 0 ...
##  $ SD17  : int  2 5 5 4 4 5 4 3 1 4 ...
##  $ SD18  : int  7 4 4 2 0 4 5 6 8 5 ...
##  $ SD19  : int  1 0 0 4 0 2 0 2 1 2 ...
##  $ SD20  : int  0 0 0 0 5 0 0 0 1 0 ...
##  $ SD21  : int  1 0 0 0 4 0 0 0 0 0 ...
##  $ SD22  : int  2 5 7 3 0 4 4 2 1 3 ...
##  $ SD23  : int  5 0 0 1 0 2 1 5 8 3 ...
##  $ SD24  : int  2 4 2 2 0 2 5 2 1 3 ...
##  $ SD25  : int  1 0 0 3 9 2 0 2 1 1 ...
##  $ SD26  : int  1 2 5 2 0 2 1 1 1 2 ...
##  $ SD27  : int  2 3 0 1 0 2 4 2 0 1 ...
##  $ SD28  : int  6 5 4 4 0 4 5 5 8 4 ...
##  $ SD29  : int  1 0 0 0 0 2 0 2 1 2 ...
##  $ SD30  : int  1 2 7 5 4 9 6 0 9 0 ...
##  $ SD31  : int  8 7 2 4 5 0 3 9 0 9 ...
##  $ SD32  : int  8 7 7 9 6 5 8 4 5 6 ...
##  $ SD33  : int  0 1 0 0 2 3 0 4 2 1 ...
##  $ SD34  : int  1 2 2 0 1 3 1 2 3 2 ...
##  $ SD35  : int  8 6 9 7 5 9 9 6 7 6 ...
##  $ SD36  : int  1 3 0 2 4 0 0 3 2 3 ...
##  $ SD37  : int  0 2 4 1 0 5 4 2 7 2 ...
##  $ SD38  : int  4 0 5 5 0 2 3 5 2 3 ...
##  $ SD39  : int  5 5 0 3 9 3 3 3 1 3 ...
##  $ SD40  : int  0 2 0 0 0 0 0 0 0 1 ...
##  $ SD41  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ SD42  : int  4 5 3 4 6 3 3 3 2 4 ...
##  $ SD43  : int  3 4 4 4 3 3 5 3 3 7 ...
##  $ PO44  : int  0 2 2 0 0 0 0 0 0 2 ...
##  $ PO45  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO46  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO47  : int  6 0 6 6 0 6 6 0 5 0 ...
##  $ PO48  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO49  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO50  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO51  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO52  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO53  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO54  : int  0 0 0 0 0 0 0 3 0 0 ...
##  $ PO55  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO56  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO57  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO58  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO59  : int  5 2 2 2 6 0 0 0 0 3 ...
##  $ PO60  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO61  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO62  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO63  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO64  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO65  : int  0 2 1 0 0 0 0 0 0 1 ...
##  $ PO66  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO67  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO68  : int  1 0 1 1 0 1 1 0 1 0 ...
##  $ PO69  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO70  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO71  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO72  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO73  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO74  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO75  : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ PO76  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO77  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO78  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO79  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO80  : int  1 1 1 1 1 0 0 0 0 1 ...
##  $ PO81  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO82  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO83  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO84  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PO85  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CLASS : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ STATUS: Factor w/ 2 levels "Learning","Test": 1 1 1 1 1 1 1 1 1 1 ...

Visualisation des corr?lations entre les variables sociod?mographiques

dd <- dataAssurance[,1:43]
cc <- cor(dd)
corrplot(t(cc), method="ellipse")

Visualisation des corr?lations entre les variables propri?taires de produits

dd <- dataAssurance[,44:85]
cc <- cor(dd)
corrplot(t(cc), method="ellipse")

Le nombre de clients qui d?sirent ou non avoir une assurance sur caravane

summary(dataAssurance$CLASS)
##   No  Yes 
## 9236  586
plot(dataAssurance$CLASS)

On remarque que le nombre des clients qui souhaitent de leur plein gr? avoir une assurance caravane est tr?s inferieur ? celui des clients qui ne souhaitent pas avoir ce type d’assurance.

boxplot(x = dataAssurance)

Les attributs sociodemographiques :

ggplot(data=dataAssurance, aes(dataAssurance$SD1)) + 
  geom_histogram(breaks=seq(1, 41, by=1), aes(fill=..count..)) +
  scale_fill_gradient("Count", low="green", high="red") +
  ggtitle("Histrogram of Type") + 
  labs(x="Class", y="Count")

Lower class large families domine notre ?chantillon

a <- dataAssurance[which(dataAssurance$SD1==33),]
df <- data.frame(
  group = a$CLASS,
  value = a$SD1
  )

ggplot(df, aes(x=factor(1), fill=group))+
  geom_bar(width = 1)+
  coord_polar("y")

Majoritairement , les individues de type “Lower class large” ont r?pondu non ? l’assurance caravane.

Subdiviser le dataset pour r?cuperer les individus qui ont r?pondu OUI

y <- dataAssurance[which(dataAssurance$CLASS=="Yes"),]
head(y,n=50)
##     SD1 SD2 SD3 SD4 SD5 SD6 SD7 SD8 SD9 SD10 SD11 SD12 SD13 SD14 SD15 SD16
## 42   11   1   3   3   3   2   7   0   0    9    0    0    2    3    4    0
## 46   38   1   3   3   9   0   5   1   3    7    1    2    3    2    5    1
## 58   12   1   3   2   3   0   6   0   3    7    2    0    1    2    6    2
## 98    9   1   2   3   3   0   6   1   2    7    1    1    5    1    4    5
## 99   36   1   2   4   8   2   4   2   2    7    0    2    2    4    3    1
## 128   3   1   2   4   1   0   4   4   2    6    0    3    3    4    3    7
## 150   8   2   3   3   2   0   5   2   3    7    1    2    2    3    5    3
## 151   8   1   4   3   2   1   5   1   3    7    1    2    0    3    6    3
## 175  34   1   3   2   8   0   7   1   2    8    1    0    0    4    5    0
## 180  41   1   3   3  10   0   7   0   3    8    1    1    1    3    5    1
## 195  36   1   3   3   8   0   7   0   2    7    0    2    0    4    5    0
## 204  38   1   3   2   9   1   0   1   7    4    1    4    1    1    8    4
## 227  10   1   3   3   3   0   9   0   0    7    0    2    2    3    5    5
## 235  38   1   3   2   9   0   6   0   3    5    2    2    2    2    5    0
## 236   8   1   3   3   2   1   7   0   1    6    0    3    2    4    4    4
## 247   1   1   4   3   1   1   5   1   3    6    1    2    0    2    7    8
## 249  33   1   2   3   8   0   7   0   2    7    2    0    0    5    4    0
## 254  38   1   3   2   9   2   2   2   4    7    0    2    1    1    8    3
## 282   8   1   2   4   2   0   7   0   2    9    0    0    2    5    3    0
## 298   8   1   3   3   2   1   5   1   3    7    1    2    0    2    7    5
## 314  13   1   3   3   3   0   6   0   3    6    0    3    2    4    4    4
## 319  35   1   2   3   8   1   5   1   3    5    1    4    3    3    4    2
## 336  12   1   4   2   3   0   4   0   5    9    0    0    0    2    7    0
## 337  38   1   4   2   9   0   2   3   5    6    0    3    2    0    7    0
## 357   8   2   4   3   2   0   5   0   5    9    0    0    0    0    9    0
## 401  31   1   3   3   7   2   3   0   5    4    2    4    3    2    5    0
## 404  34   1   3   2   8   1   5   1   4    6    1    3    2    1    6    3
## 427  38   2   3   2   9   0   6   0   3    6    2    2    1    4    5    1
## 440   6   1   3   3   2   0   7   2   0    9    0    0    0    3    6    6
## 445  33   1   3   3   8   1   4   1   5    7    1    2    1    4    4    2
## 481  32   1   2   3   7   0   5   1   3    6    0    3    3    3    3    0
## 505  12   1   3   2   3   1   3   0   5    7    1    2    0    3    6    2
## 511   8   1   3   3   2   2   5   2   1    7    2    0    0    2    7    4
## 553   3   1   1   2   1   0   5   0   4    0    3    6    6    3    0    3
## 561   8   1   3   3   2   2   3   0   5    7    2    0    0    5    4    1
## 621  39   1   2   2   9   0   3   2   5    5    3    2    1    6    2    1
## 627  33   1   3   3   8   1   4   1   5    7    1    2    1    4    4    2
## 662   8   1   3   3   2   1   5   1   3    7    1    2    0    2    7    5
## 670  38   1   2   2   9   0   4   2   4    7    1    2    3    4    3    1
## 671  24   1   2   2   5   0   4   0   5    3    2    4    4    3    2    1
## 685   8   1   3   3   2   1   6   2   1    7    0    2    1    2    6    4
## 705  33   1   2   3   8   1   4   1   4    6    2    3    3    4    3    1
## 763  32   1   3   3   7   0   6   0   3    7    0    2    0    5    4    2
## 771  33   1   2   3   8   1   4   1   4    6    2    3    3    4    3    1
## 774  38   1   3   2   9   0   4   0   5    9    0    0    0    2    7    1
## 800   1   1   2   4   1   0   2   0   7    6    0    3    5    3    2    3
## 812   3   1   2   3   1   1   5   4   0    9    0    0    1    6    3    4
## 816   8   1   3   3   2   1   7   0   1    6    0    3    2    4    4    4
## 835  12   2   4   2   3   0   2   0   7    9    0    0    0    0    9    0
## 838   3   1   2   3   1   1   5   1   3    6    1    2    0    5    4    4
##     SD17 SD18 SD19 SD20 SD21 SD22 SD23 SD24 SD25 SD26 SD27 SD28 SD29 SD30
## 42     5    4    5    0    0    4    0    1    1    2    4    2    2    1
## 46     3    6    2    1    1    2    3    3    1    1    1    5    2    5
## 58     4    4    1    0    0    5    2    1    0    3    2    4    0    0
## 98     4    0    4    0    0    5    1    0    1    8    0    0    0    9
## 99     5    4    1    0    0    3    1    4    1    1    4    4    0    2
## 128    2    0    7    0    0    2    0    1    7    2    0    0    0    0
## 150    4    3    4    0    0    4    1    0    4    2    2    2    0    1
## 151    5    1    3    1    0    4    1    2    3    3    2    2    1    1
## 175    4    5    0    0    0    5    1    4    0    1    5    4    0    4
## 180    4    5    1    1    3    2    3    1    2    1    3    4    0    2
## 195    0    9    0    0    0    2    7    0    0    0    0    9    0    9
## 204    4    2    4    1    0    2    2    3    2    1    5    2    0    6
## 227    2    3    3    0    0    3    2    1    2    3    3    2    0    0
## 235    2    7    0    0    0    2    7    0    0    0    0    9    0    2
## 236    3    2    3    0    0    5    1    1    3    2    3    2    0    0
## 247    1    1    6    0    0    2    2    1    5    3    2    1    0    0
## 249    3    6    2    0    0    2    5    2    2    1    2    5    2    0
## 254    2    5    1    0    0    5    1    3    1    1    4    3    2    4
## 282    4    5    0    0    0    4    1    5    0    0    5    4    1    9
## 298    3    2    3    1    0    4    2    1    4    3    2    2    1    1
## 314    3    2    7    0    0    0    2    0    7    0    0    2    0    0
## 319    2    6    1    1    1    2    4    2    1    1    2    5    1    4
## 336    5    4    2    0    0    2    3    3    1    2    2    4    1    1
## 337    6    3    0    0    0    7    0    2    0    3    4    3    0    4
## 357    5    5    0    0    0    9    0    0    0    4    0    5    0    0
## 401    4    5    0    0    0    2    2    5    0    2    3    3    2    9
## 404    3    4    2    0    0    5    1    2    2    2    3    4    0    1
## 427    6    3    1    0    0    3    3    3    1    3    2    5    0    4
## 440    3    0    6    2    0    2    0    0    7    2    0    0    0    0
## 445    3    4    3    0    2    2    3    1    3    1    3    3    2    3
## 481    1    8    0    0    0    4    0    5    0    1    1    5    3    9
## 505    4    3    3    0    0    3    0    3    3    1    4    3    0    4
## 511    4    2    3    0    0    5    2    1    2    3    3    2    1    0
## 553    3    3    0    0    0    6    0    3    0    4    2    2    2    7
## 561    8    1    3    0    0    4    1    3    1    2    4    4    0    5
## 621    4    4    1    1    1    6    1    2    1    2    2    5    0    8
## 627    3    4    3    0    2    2    3    1    3    1    3    3    2    3
## 662    3    2    3    1    0    4    2    1    4    3    2    2    1    1
## 670    1    8    1    0    0    3    6    0    1    1    1    8    0    3
## 671    6    2    0    0    0    4    2    3    0    2    3    5    0    7
## 685    3    2    5    1    0    2    1    1    5    2    1    2    0    2
## 705    4    5    1    1    0    3    2    4    1    2    2    5    1    5
## 763    4    4    2    2    0    3    3    2    3    2    0    4    0    2
## 771    4    5    1    1    0    3    2    4    1    2    2    5    1    5
## 774    2    6    1    0    0    3    3    3    1    1    3    6    1    5
## 800    4    2    7    0    0    0    2    0    4    0    4    1    0    0
## 812    3    2    6    0    0    2    2    0    5    2    3    1    0    0
## 816    3    2    3    0    0    5    1    1    3    2    3    2    0    0
## 835    3    6    0    0    0    7    0    2    0    2    0    6    2    0
## 838    0    5    2    1    1    2    3    2    3    0    6    0    0    0
##     SD31 SD32 SD33 SD34 SD35 SD36 SD37 SD38 SD39 SD40 SD41 SD42 SD43 PO44
## 42     8    7    2    0    7    2    2    4    3    0    1    4    6    2
## 46     4    5    2    2    7    2    4    4    1    1    1    3    4    2
## 58     9    9    0    0    5    4    2    4    4    0    0    4    7    2
## 98     0    8    1    1    4    5    0    5    4    0    0    5    4    0
## 99     7    5    3    2    5    4    2    5    3    0    0    4    3    0
## 128    9    5    4    0    3    6    0    3    6    0    0    4    6    2
## 150    8    8    0    1    6    3    0    5    4    0    0    4    7    0
## 151    8    7    0    2    5    4    1    3    5    1    1    5    7    0
## 175    5    8    1    0    9    0    2    5    2    2    0    4    6    0
## 180    7    5    4    0    6    3    3    4    4    0    0    4    4    0
## 195    0    7    0    2    9    0    3    4    2    2    0    4    3    0
## 204    3    6    0    3    6    3    3    2    4    1    0    4    5    0
## 227    9    9    0    0    3    6    2    0    4    4    0    7    8    2
## 235    7    9    0    0    7    2    0    9    0    0    0    3    4    0
## 236    9    6    3    2    5    4    1    4    4    1    0    5    7    2
## 247    9    7    2    1    5    4    0    1    4    5    1    7    8    2
## 249    9    4    4    2    6    3    2    5    3    0    0    3    3    0
## 254    5    7    2    0    7    2    4    4    2    1    0    4    4    0
## 282    0    7    0    2    5    4    4    4    2    0    0    3    7    0
## 298    8    7    0    2    5    4    1    2    4    3    1    6    7    2
## 314    9    9    0    0    4    5    0    5    4    0    0    4    6    0
## 319    5    6    1    2    7    2    3    5    2    0    0    3    5    3
## 336    8    5    2    2    8    1    2    1    4    2    0    5    8    0
## 337    5    9    0    0    7    2    0    7    0    2    0    4    4    2
## 357    9    6    3    0    2    7    0    9    0    0    0    3    7    2
## 401    0    3    0    6    9    0    7    2    0    0    0    2    1    1
## 404    8    7    2    0    7    2    1    4    4    1    0    4    6    2
## 427    5    8    0    1    8    1    2    3    4    0    0    4    4    2
## 440    9    7    2    0    2    7    0    0    7    2    0    5    8    0
## 445    6    7    2    1    5    4    3    2    3    2    0    4    3    2
## 481    0    7    0    2    9    0    6    2    1    0    0    2    1    0
## 505    5    7    2    0    5    4    2    4    3    0    0    4    7    0
## 511    9    9    0    0    4    5    1    2    7    0    0    5    7    2
## 553    2    5    0    4    7    2    5    4    0    0    0    2    6    0
## 561    4    9    0    0    6    3    2    1    4    3    0    5    7    2
## 621    1    5    1    4    8    1    6    3    1    1    0    2    5    2
## 627    6    7    2    1    5    4    3    2    3    2    0    4    3    0
## 662    8    7    0    2    5    4    1    2    4    3    1    6    7    2
## 670    6    7    1    1    8    1    0    7    2    0    0    4    4    0
## 671    2    7    0    2    9    0    3    4    2    1    0    4    2    2
## 685    7    6    3    1    4    5    1    2    6    2    0    6    7    2
## 705    4    6    1    3    7    2    3    4    3    1    0    3    3    2
## 763    7    7    2    0    2    7    0    5    4    0    0    4    1    2
## 771    4    6    1    3    7    2    3    4    3    1    0    3    3    2
## 774    4    6    1    2    7    2    1    4    4    1    0    4    4    2
## 800    9    9    0    0    2    7    2    2    5    0    0    4    8    2
## 812    9    5    4    0    0    9    0    0    8    1    0    5    6    2
## 816    9    6    3    2    5    4    1    4    4    1    0    5    7    2
## 835    9    9    0    0    9    0    2    0    5    2    0    5    7    2
## 838    9    6    1    3    7    2    3    4    2    1    0    3    6    2
##     PO45 PO46 PO47 PO48 PO49 PO50 PO51 PO52 PO53 PO54 PO55 PO56 PO57 PO58
## 42     0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 46     0    0    6    0    5    0    0    0    0    0    0    0    0    0
## 58     0    0    0    0    0    0    0    0    0    0    0    0    0    6
## 98     0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 99     0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 128    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 150    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 151    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 175    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 180    0    3    6    0    0    0    0    0    0    0    0    0    0    0
## 195    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 204    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 227    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 235    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 236    0    0    6    0    0    0    2    0    0    0    0    0    0    0
## 247    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 249    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 254    3    0    6    0    0    0    0    5    0    0    0    0    0    6
## 282    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 298    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 314    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 319    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 336    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 337    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 357    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 401    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 404    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 427    0    0    6    0    0    0    0    0    0    0    5    0    0    0
## 440    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 445    0    0    6    0    0    0    0    0    0    0    0    0    3    0
## 481    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 505    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 511    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 553    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 561    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 621    0    0    0    0    4    0    0    0    0    0    0    0    0    0
## 627    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 662    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 670    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 671    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 685    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 705    0    0    6    0    5    0    0    0    0    0    3    0    0    0
## 763    0    0    0    0    0    0    2    0    0    0    0    0    0    0
## 771    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 774    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 800    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 812    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 816    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 835    0    0    6    0    0    0    0    0    0    0    0    0    0    0
## 838    0    0    6    0    0    0    0    0    0    0    0    0    0    0
##     PO59 PO60 PO61 PO62 PO63 PO64 PO65 PO66 PO67 PO68 PO69 PO70 PO71 PO72
## 42     4    0    0    1    0    0    1    0    0    1    0    0    0    0
## 46     3    0    0    0    0    0    1    0    0    1    0    1    0    0
## 58     4    0    0    0    0    0    1    0    0    0    0    0    0    0
## 98     0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 99     0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 128    4    0    0    0    0    0    1    0    0    1    0    0    0    0
## 150    0    0    0    0    0    0    0    0    0    1    0    0    0    0
## 151    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 175    3    0    0    0    0    0    0    0    0    0    0    0    0    0
## 180    6    0    1    0    0    0    0    0    1    1    0    0    0    0
## 195    0    0    0    0    0    0    0    0    0    1    0    0    0    0
## 204    3    0    0    0    0    0    0    0    0    1    0    0    0    0
## 227    4    0    0    0    0    0    1    0    0    1    0    0    0    0
## 235    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 236    0    0    0    0    0    0    1    0    0    1    0    0    0    1
## 247    4    0    0    0    0    0    1    0    0    1    0    0    0    0
## 249    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 254    0    0    0    0    0    0    0    1    0    2    0    0    0    0
## 282    3    0    0    0    0    4    0    0    0    1    0    0    0    0
## 298    4    0    0    0    0    0    1    0    0    0    0    0    0    0
## 314    0    0    0    0    0    0    0    0    0    1    0    0    0    0
## 319    4    0    0    0    0    0    1    0    0    2    0    0    0    0
## 336    0    0    0    0    0    0    0    0    0    1    0    0    0    0
## 337    4    0    0    0    0    3    1    0    0    1    0    0    0    0
## 357    4    0    0    0    0    0    1    0    0    1    0    0    0    0
## 401    3    0    0    0    0    0    1    0    0    1    0    0    0    0
## 404    4    0    0    0    0    0    1    0    0    1    0    0    0    0
## 427    4    0    0    0    0    0    1    0    0    1    0    0    0    0
## 440    0    0    0    0    0    0    0    0    0    1    0    0    0    0
## 445    4    0    0    0    0    0    1    0    0    1    0    0    0    0
## 481    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## 505    0    0    0    0    0    0    0    0    0    1    0    0    0    0
## 511    4    0    0    0    0    0    1    0    0    1    0    0    0    0
## 553    3    0    0    0    0    0    0    0    0    1    0    0    0    0
## 561    4    0    0    0    0    0    1    0    0    1    0    0    0    0
## 621    4    0    0    0    0    0    1    0    0    0    0    1    0    0
## 627    0    0    4    0    0    0    0    0    0    1    0    0    0    0
## 662    4    0    0    0    0    0    1    0    0    1    0    0    0    0
## 670    0    0    2    0    0    0    0    0    0    1    0    0    0    0
## 671    3    0    0    0    0    0    1    0    0    1    0    0    0    0
## 685    4    0    0    0    0    0    1    0    0    2    0    0    0    0
## 705    4    0    0    0    0    0    1    0    0    2    0    2    0    0
## 763    4    1    6    0    1    0    1    0    0    0    0    0    0    1
## 771    3    0    0    0    0    0    1    0    0    1    0    0    0    0
## 774    3    0    0    0    0    0    1    0    0    1    0    0    0    0
## 800    3    0    0    0    0    0    1    0    0    0    0    0    0    0
## 812    4    0    0    0    0    0    1    0    0    1    0    0    0    0
## 816    4    0    0    0    0    0    1    0    0    1    0    0    0    0
## 835    4    0    0    0    0    0    1    0    0    1    0    0    0    0
## 838    4    0    0    0    0    0    1    0    0    1    0    0    0    0
##     PO73 PO74 PO75 PO76 PO77 PO78 PO79 PO80 PO81 PO82 PO83 PO84 PO85 CLASS
## 42     0    0    0    0    0    0    0    2    0    0    1    0    0   Yes
## 46     0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 58     0    0    0    0    0    0    1    1    0    0    0    0    0   Yes
## 98     0    0    0    0    0    0    0    0    0    0    0    0    0   Yes
## 99     0    0    0    0    0    0    0    0    0    0    0    0    0   Yes
## 128    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 150    0    0    0    0    0    0    0    0    0    0    0    0    0   Yes
## 151    0    0    0    0    0    0    0    0    0    0    0    0    0   Yes
## 175    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 180    0    0    0    0    0    0    0    1    0    1    0    0    0   Yes
## 195    0    0    0    0    0    0    0    0    0    0    0    0    0   Yes
## 204    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 227    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 235    0    0    0    0    0    0    0    0    0    0    0    0    0   Yes
## 236    0    0    0    0    0    0    0    0    0    0    0    0    0   Yes
## 247    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 249    0    0    0    0    0    0    0    0    0    0    0    0    0   Yes
## 254    1    0    0    0    0    0    1    0    0    0    0    0    0   Yes
## 282    0    0    0    0    0    0    0    1    0    0    0    0    1   Yes
## 298    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 314    0    0    0    0    0    0    0    0    0    0    0    0    0   Yes
## 319    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 336    0    0    0    0    0    0    0    0    0    0    0    0    0   Yes
## 337    0    0    0    0    0    0    0    2    0    0    0    0    1   Yes
## 357    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 401    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 404    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 427    0    0    0    2    0    0    0    1    0    0    0    0    0   Yes
## 440    0    0    0    0    0    0    0    0    0    0    0    0    0   Yes
## 445    0    0    0    0    0    1    0    1    0    0    0    0    0   Yes
## 481    0    0    0    0    0    0    0    0    0    0    0    0    0   Yes
## 505    0    0    0    0    0    0    0    0    0    0    0    0    0   Yes
## 511    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 553    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 561    0    0    0    0    0    0    0    2    0    0    0    0    0   Yes
## 621    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 627    0    0    0    0    0    0    0    0    0    1    0    0    0   Yes
## 662    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 670    0    0    0    0    0    0    0    0    0    1    0    0    0   Yes
## 671    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 685    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 705    0    0    0    2    0    0    0    1    0    0    0    0    0   Yes
## 763    0    0    0    0    0    0    0    1    1    1    0    1    0   Yes
## 771    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 774    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 800    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 812    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 816    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 835    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
## 838    0    0    0    0    0    0    0    1    0    0    0    0    0   Yes
##       STATUS
## 42  Learning
## 46  Learning
## 58  Learning
## 98  Learning
## 99  Learning
## 128 Learning
## 150 Learning
## 151 Learning
## 175 Learning
## 180 Learning
## 195 Learning
## 204 Learning
## 227 Learning
## 235 Learning
## 236 Learning
## 247 Learning
## 249 Learning
## 254 Learning
## 282 Learning
## 298 Learning
## 314 Learning
## 319 Learning
## 336 Learning
## 337 Learning
## 357 Learning
## 401 Learning
## 404 Learning
## 427 Learning
## 440 Learning
## 445 Learning
## 481 Learning
## 505 Learning
## 511 Learning
## 553 Learning
## 561 Learning
## 621 Learning
## 627 Learning
## 662 Learning
## 670 Learning
## 671 Learning
## 685 Learning
## 705 Learning
## 763 Learning
## 771 Learning
## 774 Learning
## 800 Learning
## 812 Learning
## 816 Learning
## 835 Learning
## 838 Learning
ggplot(data=y, aes(y$SD4)) + 
  geom_histogram(breaks=seq(0, 6, by=1), aes(fill=..count..)) +
  scale_fill_gradient("Count", low="green", high="red") +
  ggtitle("Histrogram of Type") + 
  labs(x="Age", y="Count")

ncountType=table(dataAssurance$SD4)
ncountType=data.frame(ncountType)
DemoYesType <- data.frame(
  group = ncountType$Var1 ,
  value = ncountType$Freq
)
library(plotly)
YesType <- DemoYesType[,c("group", "value")]
colors <- c('rgb(215,94,96)', 'rgb(122,132,133)', 'rgb(145,253,429)', 'rgb(175,105,877)', 'rgb(115,147,23)')

p <- plot_ly(DemoYesType, labels = ~group, values = ~value, type = 'pie', marker = list(colors = colors,
                      line = list(color = '#FFFFFF', width = 1))) %>%
  layout(title = 'Customer Age range')
p

Parmi les individus qui ont r?pondu OUI, la majorit? ont entre 40 et 50 ans.

ggplot(data=y, aes(y$SD1)) + 
  geom_histogram(breaks=seq(0, 41, by=1), aes(fill=..count..)) +
  scale_fill_gradient("Count", low="green", high="red") +
  ggtitle("Histrogram of Type") + 
  labs(x="SubType", y="Count")

ggplot(data=y, aes(y$SD1)) + 
  geom_histogram(breaks=seq(0, 41, by=1), aes(fill=..count..)) +
  scale_fill_gradient("Count", low="lightblue", high="blue") +
  ggtitle("Histrogram of SubType") + 
  labs(x="SubType", y="Count")

{
  d1=dataAssurance[,c(1,86)]
  f=data.frame()
  for(i in seq(1,41,1))
  {
    f[i,"Freq"]=nrow(d1[which((d1$SD1==i) & (d1$CLASS=="Yes")),])
  }
  f
  f[,"Label"]=c("High Income, expensive child"
                ,"Very Important Provincials"
                ,"High status seniors"
                ,"Affluent senior apartments"
                ,"Mixed seniors"
                ,"Career and childcare"
                ,"Dinki's (double income no kids)"
                ,"Middle class families"
                ,"Modern, complete families"
                ,"Stable family"
                ,"Family starters"
                ,"Affluent young families"
                ,"Young all american family"
                ,"Junior cosmopolitan"
                ,"Senior cosmopolitans"
                ,"Students in apartments"
                ,"Fresh masters in the city"
                ,"Single youth"
                ,"Suburban youth"
                ,"Etnically diverse"
                ,"Young urban have-nots"
                ,"Mixed apartment dwellers"
                ,"Young and rising"
                ,"Young, low educated" 
                ,"Young seniors in the city"
                ,"Own home elderly"
                ,"Seniors in apartments"
                ,"Residential elderly"
                ,"Porchless seniors: no front yard"
                ,"Religious elderly singles"
                ,"Low income catholics"
                ,"Mixed seniors"
                ,"Lower class large families"
                ,"Large family, employed child"
                ,"Village families"
                ,"Couples with teens 'Married with children'"
                ,"Mixed small town dwellers"
                ,"Traditional families"
                ,"Large religous families"
                ,"Large family farms"
                ,"Mixed rurals")
  p <- plot_ly(f, labels=f$Label ,values = f$Freq, type = 'pie',textinfo = 'label+percent') %>%
    layout(title = 'Pie Chart MOSTYPE Customer Subtype Class YES',
           xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
           yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
  
  p
}

Parmi les individus qui ont r?pondu OUI, la majorit? sont de type “Lower class large” et “Middle class families”

ggplot(data=y, aes(y$SD5)) + 
  geom_histogram(breaks=seq(0, 11, by=1), aes(fill=..count..)) +
  scale_fill_gradient("Count", low="lightblue", high="blue") +
  ggtitle("Histrogram of MainType") + 
  labs(x="MainType", y="Count")

Parmi les individus qui ont r?pondu OUI, la majorit? sont de type “Family with gronw ups”. On remarque aussi les individus de type “driven growers” et “average family”.

ggplot(data=y, aes(y$SD6)) + 
  geom_histogram(breaks=seq(0, 11, by=1), aes(fill=..count..)) +
  scale_fill_gradient("Count", low="lightblue", high="blue") +
  ggtitle("Histrogram of Roman Catholics") + 
  labs(x="% Roman Catholic", y="Count")

à peu près 470 personnes catégorisés comme 0-10% Roman Catholic ont répondu oui à l’assurance caravane.

Les attributs propriétaire de produit:

ggplot(data=y, aes(y$PO44)) + 
  geom_histogram(breaks=seq(0, 11, by=1), aes(fill=..count..)) +
  scale_fill_gradient("Count", low="yellow", high="red") +
  ggtitle("Histrogram of Contribution private third party insurance") + 
  labs(x="Contribution private third party insurance", y="Count")

ncountType=table(y$PO44)
ncountType=data.frame(ncountType)
DemoYesType <- data.frame(
  group = ncountType$Var1 ,
  value = ncountType$Freq
)
library(plotly)
YesType <- DemoYesType[,c("group", "value")]
colors <- c('rgb(215,94,96)', 'rgb(122,132,133)', 'rgb(145,253,429)', 'rgb(175,105,877)', 'rgb(115,147,23)')

p <- plot_ly(DemoYesType, labels = ~group, values = ~value, type = 'pie', marker = list(colors = colors,
                      line = list(color = '#FFFFFF', width = 1))) %>%
  layout(title = 'Histrogram of Contribution private third party insurance')
p

Les individus qui dépensent entre 1 et 99 dollars ont répondu OUI à l’assurance caravane.

p <- plot_ly(alpha = 0.6) %>%
    add_histogram(x = ~dataAssurance[which((dataAssurance$CLASS=="No")),"PO47"],histnorm="percent",name="NON") %>%
    add_histogram(x = ~dataAssurance[which((dataAssurance$CLASS=="Yes")),"PO47"],histnorm="percent",name="OUI") %>%
    layout(barmode = "overlay")%>%
    layout(title ="Contribution car policies")
  p

50% de ceux qui ont répondu “non” à l’offre n’ont même pas d’assurance voiture alors que 71% de ceux qui ont répondu “oui” dépensent entre 1000 et 4999.

p <- plot_ly(alpha = 0.6) %>%
    add_histogram(x = ~dataAssurance[which((dataAssurance$CLASS=="No")),"PO59"],histnorm="percent",name="NON") %>%
    add_histogram(x = ~dataAssurance[which((dataAssurance$CLASS=="Yes")),"PO59"],histnorm="percent",name="OUI") %>%
    layout(barmode = "overlay")%>%
    layout(title ="Contribution fire policies")
  p

46% de ceux qui qui ont répondu “non” à l’offre n’ont pas d’assurance incendie contre 30.88% de ceux qui ont répondu “oui” à l’offre. 51% de ceux qui ont répondu “oui” à l’offre dépensent entre 100 et 499.

Modelisation

Division du DataSet:

data.train<-dataAssurance[which(dataAssurance$STATUS=="Learning"),1:86]
data_assurance_apprentissage <-data.train
data.test<-dataAssurance[which(dataAssurance$STATUS=="Test"),1:86]
data_assurance_test<-data.test

Modeles de classification

Modele Random Forest

a=c()
i=1

for (i in 1:20) {
  model1 <- randomForest(data_assurance_apprentissage$CLASS ~ ., data = data_assurance_apprentissage, ntree = 500, mtry = i, importance = TRUE)
  predValid <- predict(model1, data_assurance_test, type = "class")
  a[i] = mean(predValid == data_assurance_test$CLASS)
}

a
##  [1] 0.94050 0.94050 0.94125 0.94000 0.94075 0.93925 0.93900 0.93725
##  [9] 0.93825 0.93725 0.93650 0.93650 0.93575 0.93475 0.93550 0.93575
## [17] 0.93500 0.93600 0.93375 0.93375
plot(1:20,a)

importance(model1)
##              No         Yes MeanDecreaseAccuracy MeanDecreaseGini
## SD1  19.8344605 -3.04590541           19.4981489     23.304174317
## SD2   2.8486833  4.59084910            4.4592976      2.830382381
## SD3   8.0128570 -5.03688044            7.1490101      5.307305191
## SD4   8.7172665 -1.98928381            8.1762365      7.469725247
## SD5  15.6782300 -4.43285286           15.4696838     12.239412150
## SD6   5.6825151 -2.03257987            5.2050807      6.265044437
## SD7  10.7893591 -2.79127318            9.5197596     12.305806344
## SD8   7.0429738 -1.89583441            6.4457719      9.162264596
## SD9   9.4000987 -0.99670475            9.2013982     12.661998776
## SD10 14.4769665 -2.40597307           14.0897216      8.929162886
## SD11  5.5650693 -1.61678457            5.1407484      5.641102082
## SD12 13.6493147 -6.51159660           12.5816430      7.335753435
## SD13 11.5607834 -4.80177453           10.7410733      8.647531329
## SD14  9.8096761 -4.87543261            8.4279160     11.388545147
## SD15 11.9119167 -6.27512949           10.4090507     11.398553116
## SD16 15.9332043 -5.09389088           15.4646416     11.262688853
## SD17 12.7766811 -1.33296236           12.6769367     12.249461610
## SD18 14.1172380 -3.71316685           14.0160319     11.880037625
## SD19  9.6237108 -2.10056016            9.3749628      9.886999493
## SD20 10.2107362 -2.91412750            9.4986553      6.262687909
## SD21  6.4160962 -3.54783253            5.8778654      3.666077280
## SD22 11.2096715  2.45194089           11.8755571     12.671769255
## SD23 13.5300746 -2.28121920           13.2942848     10.939841078
## SD24 14.8271435 -7.95220751           13.2930413     11.520765695
## SD25 11.9586359 -4.19989110           11.6364017      9.193745149
## SD26  8.6175856 -1.97612966            7.8612876      9.524059829
## SD27  9.8096267 -3.08585706            9.1533055      9.772124318
## SD28 12.7373574 -2.57548045           12.5851383     11.166975661
## SD29 10.1076117 -3.98254417            9.3393665      7.088393401
## SD30 11.9590243 -3.74496901           11.3306811      9.666230197
## SD31 11.0891138 -4.44715764           10.3011470      9.876335313
## SD32 12.7678082 -4.63979465           11.8994717      8.110502942
## SD33 10.7295694 -4.79694887            9.5318815      7.848120110
## SD34 14.0961678 -6.65144178           12.8313473      7.545057365
## SD35 10.5748655 -5.73927954            9.9463611      8.830854216
## SD36 10.6495412 -5.04658978            9.6905358      8.778889692
## SD37 13.4802188 -2.72202497           13.2966507      9.080845295
## SD38 13.0170943 -2.92308075           12.3791859     11.959694137
## SD39  8.0792509 -0.28453395            8.4273180     10.371374038
## SD40 10.9113994 -1.74619653           10.5752480      9.014683956
## SD41  3.7199940 -1.41271957            3.2991917      3.251849107
## SD42 12.0798192 -0.82927127           12.4285802      8.960862302
## SD43 12.1933052 -0.94881969           12.4017325     14.719203654
## PO44  7.1450772  1.84195618            7.7425094     14.359629020
## PO45 -0.1942939 -0.14295183           -0.1991731      1.347412345
## PO46  0.6047811  1.00100150            0.7021377      0.273463079
## PO47 -0.3924323 25.82590851            6.6384782     19.305681255
## PO48  2.5309334 -1.95349355            2.0947393      0.713853332
## PO49 -2.6468618  0.89109601           -2.2433266      4.365414114
## PO50  0.0000000  0.00000000            0.0000000      0.005692308
## PO51  3.6994523 -1.73382770            3.2526683      1.520843376
## PO52  2.6367574 -0.99657627            2.4807179      1.705551206
## PO53  0.0000000  0.00000000            0.0000000      0.017697436
## PO54 14.6818199 -5.05734114           13.4056479      4.447305328
## PO55 -3.5639894  0.55826750           -3.2282308      5.705469523
## PO56 -1.4169837  0.00000000           -1.4170079      0.100756496
## PO57 -0.7591322  0.73178418           -0.4543354      2.473222587
## PO58  5.8666048  5.38685833            7.2377779      1.957793310
## PO59  3.7169034  9.69239190            6.6531811     26.116618773
## PO60 -2.7309091  0.00000000           -2.7316895      0.334687849
## PO61 11.0287648 14.11262369           14.9188857      6.514909861
## PO62  6.1901912 -0.40699833            6.1017685      3.314635100
## PO63  4.5791146 -1.09473931            4.1920485      1.573270369
## PO64 -0.9149208 -0.02432593           -0.8758787      4.479027903
## PO65  7.5756105  0.22886164            7.4995849     10.310920757
## PO66 -1.9649312  0.50343160           -1.8261755      1.113525582
## PO67  1.1423075 -1.41666593            0.9309146      0.280494575
## PO68 -4.7424238 13.31953920           -0.5909349     18.607119527
## PO69  2.2510879 -1.91428393            1.8163889      0.635910198
## PO70 -1.3231355  0.45280012           -1.0314927      4.333924778
## PO71  0.0000000  0.00000000            0.0000000      0.013333333
## PO72  2.9657855 -1.10110210            2.7257511      1.286301926
## PO73  2.4494833 -1.76545909            2.0330419      0.943067799
## PO74  1.4170489  0.00000000            1.4170505      0.035663541
## PO75 11.9860755 -2.36509284           10.8275407      2.970738739
## PO76 -6.1817248  5.31414407           -4.3120843      8.053310645
## PO77 -0.3649460  0.00000000           -0.3665692      0.087747781
## PO78 -1.1993674  0.75416045           -0.8937447      1.474228854
## PO79  3.7639550  4.04152631            4.6580660      1.645438979
## PO80  2.9419581 -0.83247245            2.7447900      8.963574107
## PO81 -1.6777316  0.00000000           -1.6779174      0.230370854
## PO82 11.8089722 15.85863479           16.1689246      5.736106086
## PO83  2.9028249 -0.65494382            2.6982068      5.207567191
## PO84  4.4711537 -2.69305731            3.9260325      1.396539901
## PO85  0.9666416  1.73320163            1.4718062      3.510194181
varImpPlot(model1)

model2 <- randomForest(data_assurance_apprentissage$CLASS ~ ., data = data_assurance_apprentissage, ntree = 500,mtry = 3, importance = TRUE)

predValid <- predict(model2, data_assurance_test, type = "class")

mean(predValid == data_assurance_test$CLASS)
## [1] 0.94075
table(predictions=predValid,actual=data_assurance_test$CLASS)
##            actual
## predictions   No  Yes
##         No  3762  237
##         Yes    0    1

Modele Random Forest avec cross validation:

data.train$CLASS <- as.factor(data.train$CLASS)
data.test$CLASS <- as.factor(data.test$CLASS)
set.seed(123)

grid <- expand.grid(mtry=c(2,3,4))


modelKF <- caret::train(CLASS ~.,
                 data=data.train,
                 method= "rf",
                 trControl= trainControl(method = "cv", number = 3, savePredictions = TRUE),
                 tuneGrid=grid,
                 preProcess=c('center','scale'))
predictionsKF<-predict(modelKF,data.test,type = "prob")
pred <- as.data.frame(predictionsKF)
pred1 <- prediction(pred[,2],data.test$CLASS)
perfrfp=performance(pred1,"tpr", "fpr")
plot(perfrfp,colorize = TRUE)

perf <- performance(pred1, "auc")
perf@y.values[[1]]
## [1] 0.6944606

Model Random Forest avec Bootstrap:

train_control_bootstrap <- trainControl(method="boot", number=10)
# train the model
modelBoot <- caret::train(CLASS~., data=data_assurance_apprentissage, trControl=train_control_bootstrap, method="rf")

predValid_boot <- predict(modelBoot, data_assurance_test, type = "raw")

table(predictions=predValid_boot,actual=data_assurance_test$CLASS)
##            actual
## predictions   No  Yes
##         No  3762  238
##         Yes    0    0
# summarize results
print(modelBoot)
## Random Forest 
## 
## 5822 samples
##   85 predictor
##    2 classes: 'No', 'Yes' 
## 
## No pre-processing
## Resampling: Bootstrapped (10 reps) 
## Summary of sample sizes: 5822, 5822, 5822, 5822, 5822, 5822, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa        
##    2    0.9408737  -0.0003698767
##   43    0.9227213   0.0472691447
##   85    0.9192118   0.0475959873
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.

Modele Adaboost

adaboost<-boosting(CLASS ~ .,data = data.train, boos=TRUE, mfinal=20,coeflearn='Breiman')
summary(adaboost)
##            Length Class   Mode     
## formula        3  formula call     
## trees         20  -none-  list     
## weights       20  -none-  numeric  
## votes      11644  -none-  numeric  
## prob       11644  -none-  numeric  
## class       5822  -none-  character
## importance    85  -none-  numeric  
## terms          3  terms   call     
## call           6  -none-  call
errorevol(adaboost,data.train)
## $error
##  [1] 0.06011680 0.06011680 0.06011680 0.06080385 0.06097561 0.06028856
##  [7] 0.05925799 0.06011680 0.05942975 0.05977327 0.05925799 0.06011680
## [13] 0.05891446 0.05960151 0.05908622 0.05925799 0.05960151 0.06046032
## [19] 0.05942975 0.05908622
## 
## attr(,"class")
## [1] "errorevol"
predValid_adab<-predict(adaboost,data.test)

mean(predValid_adab$class == data.test$CLASS)
## [1] 0.93825
table(predictions=predValid_adab$class,actual=data.test$CLASS)
##            actual
## predictions   No  Yes
##         No  3742  227
##         Yes   20   11
t1<-adaboost$trees[[1]]

rpart.plot(t1, box.palette="RdBu", shadow.col="gray", nn=TRUE,roundint=FALSE)

Decision Tree avec SMOTE:

library(DMwR)
classTree <- DMwR::SMOTE(CLASS~.,data.train,k = 5,perc.over = 600,perc.under = 100,learner = "rpart")
tree.prediction<- predict(classTree,data.test, type="class")
prp(classTree)

rpart.plot(classTree, box.palette="RdBu", shadow.col="gray", nn=TRUE,roundint=FALSE)

Modele LDA (Linear discriminant analysis):

n=85
nt=60
neval=n-nt
rep=100

### LDA
set.seed(123456789)
errlin=dim(rep)
for (k in 1:rep) {
## linear discriminant analysis
m1=lda(CLASS~.,data.train)
v <- predict(m1,data.test)
tablin=table(data.test$CLASS,v$class)
errlin[k]=(neval-sum(diag(tablin)))/neval
}
merrlin=mean(errlin)
merrlin
## [1] -148.6

Modele SVM

library(e1071) 
modelsvm<-svm(CLASS~., data=data.train, method= "C-classification" , kernel="radial" ,cost=10,gamma=0.1,cross=0,fitted=TRUE, probability=TRUE)
modelsvm
## 
## Call:
## svm(formula = CLASS ~ ., data = data.train, method = "C-classification", 
##     kernel = "radial", cost = 10, gamma = 0.1, cross = 0, fitted = TRUE, 
##     probability = TRUE)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  10 
##       gamma:  0.1 
## 
## Number of Support Vectors:  3733

Courbes ROC

table(v$class, data.test$CLASS)
##      
##         No  Yes
##   No  3731  229
##   Yes   31    9
plot(v$x) # make a scatterplot
text(v$x,m1$lev,cex=0.7,pos=4,col=c("red","blue")) # add labels

predi <- prediction(v$x,data.test$CLASS)
perfldap=performance(predi,"tpr", "fpr")
plot(perfldap,colorize = TRUE)

perf <- performance(predi, "auc")
perf@y.values[[1]]
## [1] 0.7246525

svm

presvm = predict(modelsvm,data.test,type="prob")
table(presvm,data.test$CLASS)
##       
## presvm   No  Yes
##    No  3703  225
##    Yes   59   13

Superposition des courbes ROC pour ?valuer la performance des mod?les et les comparer

plot(perfrfp,col="red")
par(new=TRUE)
plot(perfldap,col="blue")

par(new=TRUE)
#plot(presvm,col="yellow")

Linear regression

data_assurance_apprentissage$CLASS<-as.numeric(data_assurance_apprentissage$CLASS)-1
data_assurance_test$CLASS<-as.numeric(data_assurance_test$CLASS)-1
ModelLinear <-lm(data_assurance_apprentissage$CLASS ~ .,data = data_assurance_apprentissage)
#ModelLinear$coefficients
plot(ModelLinear)
## Warning: not plotting observations with leverage one:
##   4034

## Warning: not plotting observations with leverage one:
##   4034

hist(residuals(ModelLinear))

predValidModelLinear <-predict(ModelLinear,data=data_assurance_test_sc)
plot(predValidModelLinear)

library(MASS)
step<-stepAIC(ModelLinear,direction="both",trace = F)
plot(step)

hist(residuals(step))

#summary(step)
#step$coefficients
predValidStep=predict(step,data_assurance_test)
plot(predValidStep)

Logistic regression

  ModelLR <- glm(data_assurance_apprentissage$CLASS~.,data=data_assurance_apprentissage, family="binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
  ModelLR
## 
## Call:  glm(formula = data_assurance_apprentissage$CLASS ~ ., family = "binomial", 
##     data = data_assurance_apprentissage)
## 
## Coefficients:
## (Intercept)          SD1          SD2          SD3          SD4  
##   2.542e+02    6.580e-02   -1.832e-01   -2.696e-02    2.096e-01  
##         SD5          SD6          SD7          SD8          SD9  
##  -2.767e-01   -1.142e-01   -1.910e-02   -1.618e-02   -6.817e-02  
##        SD10         SD11         SD12         SD13         SD14  
##   2.310e-01    8.509e-02    1.467e-01   -8.291e-02   -1.154e-01  
##        SD15         SD16         SD17         SD18         SD19  
##  -8.140e-02    9.717e-04   -9.077e-02   -1.994e-01    8.883e-02  
##        SD20         SD21         SD22         SD23         SD24  
##   3.918e-02   -1.169e-01    1.353e-01    3.976e-02    9.954e-02  
##        SD25         SD26         SD27         SD28         SD29  
##   2.690e-02   -8.801e-03    1.200e-02    9.016e-02   -2.468e-02  
##        SD30         SD31         SD32         SD33         SD34  
##  -1.472e+01   -1.469e+01    1.819e-01    1.507e-01    9.325e-02  
##        SD35         SD36         SD37         SD38         SD39  
##  -1.445e+01   -1.451e+01    1.181e-01    1.366e-01    1.009e-01  
##        SD40         SD41         SD42         SD43         PO44  
##   1.144e-01   -1.607e-01    9.214e-02    6.856e-02    5.954e-01  
##        PO45         PO46         PO47         PO48         PO49  
##  -2.757e-01   -4.405e-01    2.306e-01    1.215e+01   -8.101e-02  
##        PO50         PO51         PO52         PO53         PO54  
##  -2.106e+00    1.014e+00    7.229e-01   -5.525e+00    2.170e-01  
##        PO55         PO56         PO57         PO58         PO59  
##  -2.382e-01   -4.523e-01    1.444e+00    8.239e-01    2.401e-01  
##        PO60         PO61         PO62         PO63         PO64  
##  -8.658e+00   -1.886e-01    3.664e-01   -1.068e+00   -1.676e-01  
##        PO65         PO66         PO67         PO68         PO69  
##  -9.293e-01    4.197e-01    2.762e-01   -3.902e-02   -7.298e+01  
##        PO70         PO71         PO72         PO73         PO74  
##   2.418e-01   -4.490e+00   -1.351e+00   -2.376e+00   -8.749e-01  
##        PO75         PO76         PO77         PO78         PO79  
##  -1.060e+00    4.789e-01    3.997e-01   -3.163e+00   -3.212e+00  
##        PO80         PO81         PO82         PO83         PO84  
##  -4.118e-01    1.047e+01    2.516e+00    2.318e-01    1.947e+00  
##        PO85  
##   1.078e+00  
## 
## Degrees of Freedom: 5821 Total (i.e. Null);  5736 Residual
## Null Deviance:       2636 
## Residual Deviance: 2243  AIC: 2415
  plot(ModelLR)
## Warning: not plotting observations with leverage one:
##   4034

## Warning: not plotting observations with leverage one:
##   4034

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

  predValidModelLR=predict(ModelLR,data_assurance_test)
  max(predValidModelLR)
## [1] 2.631314
  plot(predValidModelLR)

Deep Learning : Tensor Flow

response <- function() "CLASS"
data =dataAssurance
features <- function() setdiff(names(data), response())
set.seed(123)
index = seq(1,max(which(data$STATUS=="Learning")))
data$STATUS<-NULL
datatrain = data[ index, ]
datatest = data[ -index, ]
data_train <- as.data.frame(datatrain)
data_test  <- as.data.frame(datatest)
feature_columns <- feature_columns(
  column_numeric(features())
)
classifier <- dnn_classifier(
  feature_columns = feature_columns,
  hidden_units = c(30,20,10,5),
  n_classes = 2
)
data_input_fn <- function(data) {
  input_fn(data, features = features(), response = response())
}
t<-train(classifier, input_fn = data_input_fn(data_train))
plot(t)

predictions <- predict(classifier, input_fn = data_input_fn(data_test))
evaluation <- evaluate(classifier, input_fn = data_input_fn(data_test))
summary(evaluation)
##     accuracy      accuracy_baseline      auc         auc_precision_recall
##  Min.   :0.9405   Min.   :0.9405    Min.   :0.6803   Min.   :0.1257      
##  1st Qu.:0.9405   1st Qu.:0.9405    1st Qu.:0.6803   1st Qu.:0.1257      
##  Median :0.9405   Median :0.9405    Median :0.6803   Median :0.1257      
##  Mean   :0.9405   Mean   :0.9405    Mean   :0.6803   Mean   :0.1257      
##  3rd Qu.:0.9405   3rd Qu.:0.9405    3rd Qu.:0.6803   3rd Qu.:0.1257      
##  Max.   :0.9405   Max.   :0.9405    Max.   :0.6803   Max.   :0.1257      
##   average_loss      label/mean          loss         precision
##  Min.   :0.2251   Min.   :0.0595   Min.   :28.14   Min.   :0  
##  1st Qu.:0.2251   1st Qu.:0.0595   1st Qu.:28.14   1st Qu.:0  
##  Median :0.2251   Median :0.0595   Median :28.14   Median :0  
##  Mean   :0.2251   Mean   :0.0595   Mean   :28.14   Mean   :0  
##  3rd Qu.:0.2251   3rd Qu.:0.0595   3rd Qu.:28.14   3rd Qu.:0  
##  Max.   :0.2251   Max.   :0.0595   Max.   :28.14   Max.   :0  
##  prediction/mean       recall   global_step
##  Min.   :0.03245   Min.   :0   Min.   :46  
##  1st Qu.:0.03245   1st Qu.:0   1st Qu.:46  
##  Median :0.03245   Median :0   Median :46  
##  Mean   :0.03245   Mean   :0   Mean   :46  
##  3rd Qu.:0.03245   3rd Qu.:0   3rd Qu.:46  
##  Max.   :0.03245   Max.   :0   Max.   :46
plot(evaluation)